--- title: OCO2 - Reproduce F. Chevallier's figures keywords: fastai sidebar: home_sidebar ---
!apt-get install libgeos-3.5.0
!apt-get install libgeos-dev
!pip install https://github.com/matplotlib/basemap/archive/master.zip
Using Data from OCO-2 Satellite, issued by the NASA.
We here try to reproduce the results from this paper by F. Chevallier, trying to "[observe] carbon dioxide emissions over China's cities with the Orbiting Carbon Observatory-2".
//TODO: Explanation
import pandas as pd
import numpy as np
import matplotlib
import matplotlib.pyplot as plt
import seaborn as sns
from mpl_toolkits.basemap import Basemap #Imported directly from the github repository
Parameters:
def draw_map(data, x="longitude", y="latitude", c="xco2", lon_min=-180, lon_max=180, lat_min=-90, lat_max=90, size_point=1, frontier=False):
plt.figure(figsize=(15, 10), edgecolor='w')
m = Basemap(llcrnrlat=lat_min, urcrnrlat=lat_max, llcrnrlon=lon_min, urcrnrlon=lon_max)
m.shadedrelief()
parallels = np.arange(-80.,81,10.)
m.drawparallels(parallels,labels=[False,True,True,False])
meridians = np.arange(10.,351.,20.)
m.drawmeridians(meridians,labels=[True,False,False,True])
normal = matplotlib.colors.LogNorm(vmin=data[c].min(), vmax=data[c].max())
m.scatter(data[x], data[y], c=data[c], cmap=plt.cm.jet, s=size_point, norm=normal)
if (frontier):
m.drawcountries(linewidth=0.5)
m.drawcoastlines(linewidth=0.7)
plt.show()
Parameters:
Return:
from math import radians, cos, sin, asin, sqrt
def haversine_formula(lon1, lat1, lon2, lat2):
# convert decimal degrees to radians
lon1, lat1, lon2, lat2 = map(radians, [lon1, lat1, lon2, lat2])
# haversine formula
dlon = lon2 - lon1
dlat = lat2 - lat1
a = sin(dlat/2)**2 + cos(lat1) * cos(lat2) * sin(dlon/2)**2
c = 2 * asin(sqrt(a))
r = 6371 # Radius of earth in kilometers. Use 3956 for miles
return c * r
def haversine(row, lat, lon):
return haversine_formula(lon, lat, row['longitude'], row['latitude'])
Sample data can be accessed freely on the NASA Database, among other open data from several NASA sattelites.
We will be using CSV aggregated by Benoit Courty here.
data_1610 = pd.read_csv("http://courty.fr/OCO2/oco2_1610.csv", sep=";")
data_1705 = pd.read_csv("http://courty.fr/OCO2/oco2_1705.csv", sep=";")
data_1803 = pd.read_csv("http://courty.fr/OCO2/oco2_1803.csv", sep=";")
data_1805 = pd.read_csv("http://courty.fr/OCO2/oco2_1805.csv", sep=";")
data_1808 = pd.read_csv("http://courty.fr/OCO2/oco2_1808.csv", sep=";")
data_1809 = pd.read_csv("http://courty.fr/OCO2/oco2_1809.csv", sep=";")
data_1610.head()
data_1705.describe()
To convert the sounding_id into a datetime variable data:
from datetime import datetime
def to_date(a):
return datetime.strptime(str(a), '%Y%m%d%H%M%S%f')
data_1610['date'] = data_1610['sounding_id'].apply(to_date)
data_1705['date'] = data_1705['sounding_id'].apply(to_date)
data_1803['date'] = data_1803['sounding_id'].apply(to_date)
data_1805['date'] = data_1805['sounding_id'].apply(to_date)
data_1808['date'] = data_1808['sounding_id'].apply(to_date)
data_1809['date'] = data_1809['sounding_id'].apply(to_date)
data_1803.head()
We are seaking the emission peaks taken as an example in the annexes of F. Chevallier's article Observing carbon dioxide emissions over China's cities with the Orbiting Carbon Observatory-2:
# We consider the October 2016 datset at the right day
data_1610_1017 = data_1610[data_1610['date'] < "2016-10-18"]
data_1610_1017 = data_1610_1017[data_1610_1017['date'] > "2016-10-17"]
draw_map(data_1610_1017)
# We consider the orgit going over East China
data_anshan = data_1610_1017[data_1610_1017['longitude'] > 120]
data_anshan = data_anshan[data_anshan['longitude'] < 130]
# We also spot an unwanted datapoint over Russia (point number 1613182)
data_anshan = data_anshan.drop([1613182])
data_anshan
# We retrieve the Figure 2.A
draw_map(data_anshan, lon_min=70, lon_max=140, lat_min=15, lat_max=55, frontier=True, size_point=5)
# We represent the observation zoomed on Anshan
draw_map(data_anshan, lon_min=122, lon_max=123.7, lat_min=40, lat_max=41.7, frontier=True, size_point=5)
# We restrict the data to the window studied
data_anshan_restrict = data_anshan.query('longitude>122 and longitude<123.7 and latitude>40 and latitude<41.7')
lon_min = data_anshan_restrict['longitude'].iloc[0]
lat_min = data_anshan_restrict['latitude'].iloc[0]
# We create a 'distance' column considering th distance of the measure to the minimal longitude and latitude
data_anshan_restrict['distance'] = data_anshan_restrict.apply(lambda row: haversine(row, lon_min, lat_min), axis=1)
data_anshan_restrict
data_anshan_restrict.plot.scatter(x='distance', y='xco2')
# We consider the May 2018 datset at the right day
data_1805_17 = data_1805[data_1805['date'] < "2018-05-18"]
data_1805_17 = data_1805_17[data_1805_17['date'] > "2018-05-17"]
draw_map(data_1805_17)
# We consider the orgit going over East China (retrieved with the coordinates first, then with the orbit id)
data_baotou = data_1805_17[data_1805_17['orbit'] == 20605]
data_baotou
# We retrieve the Figure 2.A
draw_map(data_baotou, lon_min=70, lon_max=140, lat_min=15, lat_max=55, frontier=True, size_point=5)
# We represent the observation zoomed on Anshan
draw_map(data_baotou, lon_min=109, lon_max=110.7, lat_min=39.7, lat_max=41.4, frontier=True, size_point=5)
# We restrict the data to the window studied
data_baotou_restrict = data_baotou.query('longitude>109 and longitude<110.7and latitude>39.7 and latitude<41.4')
lon_min = data_anshan_restrict['longitude'].iloc[0]
lat_min = data_anshan_restrict['latitude'].iloc[0]
# We create a 'distance' column considering th distance of the measure to the minimal longitude and latitude
data_baotou_restrict['distance'] = data_baotou_restrict.apply(lambda row: haversine(row, lon_min, lat_min), axis=1)
data_baotou_restrict
data_baotou_restrict.plot.scatter(x='distance', y='xco2')
# We consider the September 2018 datset at the right day
data_1809_24 = data_1809[data_1809['date'] < "2018-09-25"]
data_1809_24 = data_1809_24[data_1809_24['date'] > "2018-09-24"]
draw_map(data_1809_24)
# We consider the orgit going over East China
#data_dezhou = data_1809_24[data_1809_24['longitude'] > 115]
#data_dezhou = data_dezhou[data_dezhou['longitude'] < 125]
data_dezhou = data_1809_24[data_1809_24['orbit'] == 22498]
data_dezhou
# We retrieve the Figure 2.A
draw_map(data_dezhou, lon_min=70, lon_max=140, lat_min=15, lat_max=55, frontier=True, size_point=5)
# We represent the observation zoomed on Anshan
draw_map(data_dezhou, lon_min=115.3, lon_max=117.1, lat_min=36.5, lat_max=38.3, frontier=True, size_point=5)
# We restrict the data to the window studied
data_dezhou_restrict = data_dezhou.query('longitude>115.3 and longitude<117.1 and latitude>36.5 and latitude<38.5')
lon_min = data_dezhou_restrict['longitude'].iloc[0]
lat_min = data_dezhou_restrict['latitude'].iloc[0]
# We create a 'distance' column considering th distance of the measure to the minimal longitude and latitude
data_dezhou_restrict['distance'] = data_dezhou_restrict.apply(lambda row: haversine(row, lon_min, lat_min), axis=1)
data_dezhou_restrict
data_dezhou_restrict.plot.scatter(x='distance', y='xco2')
# We consider the August 2018 datset at the right day
data_1808_25 = data_1808[data_1808['date'] < "2018-08-26"]
data_1808_25 = data_1808_25[data_1808_25['date'] > "2018-08-25"]
draw_map(data_1808_25)
# We consider the orgit going over East China
#data_laiwu = data_1808_25[data_1808_25['longitude'] > 110]
#data_laiwu = data_laiwu[data_laiwu['longitude'] < 125]
data_laiwu = data_1808_25[data_1808_25['orbit'] == 22061]
data_laiwu
# We retrieve the Figure 2.A
draw_map(data_laiwu, lon_min=70, lon_max=140, lat_min=15, lat_max=55, frontier=True, size_point=5)
# We represent the observation zoomed on Anshan
draw_map(data_laiwu, lon_min=116.5, lon_max=118.2, lat_min=35.4, lat_max=37.1, frontier=True, size_point=5)
# We restrict the data to the window studied
data_laiwu_restrict = data_laiwu.query('longitude>116.5 and longitude<118.2 and latitude>35.4 and latitude<37.1')
lon_min = data_laiwu_restrict['longitude'].iloc[0]
lat_min = data_laiwu_restrict['latitude'].iloc[0]
# We create a 'distance' column considering th distance of the measure to the minimal longitude and latitude
data_laiwu_restrict['distance'] = data_laiwu_restrict.apply(lambda row: haversine(row, lon_min, lat_min), axis=1)
data_laiwu_restrict
data_laiwu_restrict.plot.scatter(x='distance', y='xco2')
# We consider the March 2018 datset at the right day
data_1803_09 = data_1803[data_1803['date'] < "2018-03-10"]
data_1803_09 = data_1803_09[data_1803_09['date'] > "2018-03-09"]
draw_map(data_1803_09)
# We consider the orgit going over East China
#data_nanjing = data_1803_09[data_1803_09['longitude'] > 115]
#data_nanjing = data_nanjing[data_nanjing['longitude'] < 125]
data_nanjing = data_1803_09[data_1803_09['orbit'] == 19600]
data_nanjing
# We retrieve the Figure 2.A
draw_map(data_nanjing, lon_min=70, lon_max=140, lat_min=15, lat_max=55, frontier=True, size_point=5)
# We represent the observation zoomed on Anshan
draw_map(data_nanjing, lon_min=118.3, lon_max=120, lat_min=31.3, lat_max=33, frontier=True, size_point=5)
# We restrict the data to the window studied
data_nanjing_restrict = data_nanjing.query('longitude>118.3 and longitude<120 and latitude>31.3 and latitude<33')
lon_min = data_nanjing_restrict['longitude'].iloc[0]
lat_min = data_nanjing_restrict['latitude'].iloc[0]
# We create a 'distance' column considering th distance of the measure to the minimal longitude and latitude
data_nanjing_restrict['distance'] = data_nanjing_restrict.apply(lambda row: haversine(row, lon_min, lat_min), axis=1)
data_nanjing_restrict
data_nanjing_restrict.plot.scatter(x='distance', y='xco2')
# We consider the August 2018 datset at the right day
data_1705_18 = data_1705[data_1705['date'] < "2017-05-19"]
data_1705_18 = data_1705_18[data_1705_18['date'] > "2017-05-18"]
draw_map(data_1705_18)
# We consider the orgit going over East China
#data_tangshan = data_1705_18[data_1705_18['longitude'] > 110]
#data_tangshan = data_tangshan[data_tangshan['longitude'] < 125]
data_tangshan = data_1705_18[data_1705_18['orbit'] == 15304]
data_tangshan
# We retrieve the Figure 2.A
draw_map(data_tangshan, lon_min=70, lon_max=140, lat_min=15, lat_max=55, frontier=True, size_point=5)
# We represent the observation zoomed on Anshan
draw_map(data_tangshan, lon_min=117.8, lon_max=119.1, lat_min=39.2, lat_max=40.6, frontier=True, size_point=5)
# We restrict the data to the window studied
data_tangshan_restrict = data_tangshan.query('longitude>117.8 and longitude<119.1 and latitude>39.2 and latitude<40.6')
lon_min = data_tangshan_restrict['longitude'].iloc[0]
lat_min = data_tangshan_restrict['latitude'].iloc[0]
# We create a 'distance' column considering th distance of the measure to the minimal longitude and latitude
data_tangshan_restrict['distance'] = data_tangshan_restrict.apply(lambda row: haversine(row, lon_min, lat_min), axis=1)
data_tangshan_restrict
data_tangshan_restrict.plot.scatter(x='distance', y='xco2')